In [5]:
import numpy as np
from numpy import ma
from pyproj import Geod
from metpy.io.nexrad import Level2File
from metpy.plots import ctables
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import os, tarfile, wget, re
import boto3
from botocore.handlers import disable_signing
import matplotlib.pyplot as plt
%matplotlib inline
In [24]:
s3_client = boto3.client('s3')
resource = boto3.resource('s3')
# Disable signing for anonymous requests to public bucket
resource.meta.client.meta.events.register('choose-signer.s3.*', disable_signing)
nexrad_bucket = resource.Bucket('noaa-nexrad-level2')
paginator = s3_client.get_paginator('list_objects')
klot_95 = list()
for result in s3_client.list_objects(Bucket='noaa-nexrad-level2', Prefix='1995')['Contents']:
#print(result)
#print(type(result['Key']))
# if (result['Key'].find('KLOT') != -1) and (result['Key'].endswith('.gz')):
# #if (result['Key'].endswith('.gz')):
# klot_95.append(result['Key'])
# print(klot_95[:5])
In [14]:
s3_client = boto3.client('s3')
resource = boto3.resource('s3')
# Disable signing for anonymous requests to public bucket
resource.meta.client.meta.events.register('choose-signer.s3.*', disable_signing)
nexrad_bucket = resource.Bucket('noaa-nexrad-level2')
local_filepath = 'nexrad_data/KLOT20110723_074851_V03.gz'
nexrad_bucket.download_file('2011/07/23/KLOT/KLOT20110723_074851_V03.gz', local_filepath)
f = Level2File(local_filepath)
f.sweeps[0][0]
Out[14]:
In [4]:
local_filepath = 'nexrad_data/KLOT20110723_074851_V03.gz'
f = Level2File(local_filepath)
# Pull data out of the file
sweep = 0
# First item in ray is header, which has azimuth angle
az = np.array([ray[0].az_angle for ray in f.sweeps[sweep]])
# 5th item is a dict mapping a var name (byte string) to a tuple
# of (header, data array)
ref_hdr = f.sweeps[sweep][0][4][b'REF'][0]
ref_range = np.arange(ref_hdr.num_gates) * ref_hdr.gate_width + ref_hdr.first_gate
ref = np.array([ray[4][b'REF'][1] for ray in f.sweeps[sweep]])
In [5]:
# fig, axes = plt.subplots(1, 2, figsize=(15, 8))
# for var_data, var_range, ax in zip((ref, rho), (ref_range, rho_range), axes):
# Turn into an array, then mask
data = ma.array(ref)
data[np.isnan(data)] = ma.masked
# Convert az,range to x,y
xlocs = ref_range * np.sin(np.deg2rad(az[:, np.newaxis]))
ylocs = ref_range * np.cos(np.deg2rad(az[:, np.newaxis]))
# Plot the data
cmap = ctables.registry.get_colortable('viridis')
plt.pcolormesh(xlocs, ylocs, data, cmap=cmap)
#plt.set_aspect('equal', 'datalim')
plt.show()
In [54]:
def extract_data(filepath):
f = Level2File(filepath)
# Pull data out of the file
sweep = 0
# First item in ray is header, which has azimuth angle
az = np.array([ray[0].az_angle for ray in f.sweeps[sweep]])
# 5th item is a dict mapping a var name (byte string) to a tuple
# of (header, data array)
ref_hdr = f.sweeps[sweep][0][4][b'REF'][0]
ref_range = np.arange(ref_hdr.num_gates) * ref_hdr.gate_width + ref_hdr.first_gate
ref = np.array([ray[4][b'REF'][1] for ray in f.sweeps[sweep]])
data_hdr = f.sweeps[sweep][0][1]
data = ma.array(ref)
data[data==0] = ma.masked
# Data from MetPy needs to be converted to latitude and longitude coordinates
g = Geod(ellps='clrk66')
center_lat = np.ones([len(az),len(ref_range)])*data_hdr.lat
center_lon = np.ones([len(az),len(ref_range)])*data_hdr.lon
az2D = np.ones_like(center_lat)*az[:,None]
rng2D = np.ones_like(center_lat)*np.transpose(ref_range[:,None])*1000
lon,lat,back = g.fwd(center_lon,center_lat,az2D,rng2D)
return lon, lat, data
def unstack_process(lon, lat, data):
lat_df = pd.DataFrame(lat)
lon_df = pd.DataFrame(lon)
data_df = pd.DataFrame(data)
lon_stack = lon_df.stack().reset_index()
lon_stack = lon_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'lon'})
lat_stack = lat_df.stack().reset_index()
lat_stack = lat_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'lat'})
coord_merge = pd.merge(lat_stack, lon_stack, on=['x', 'y']).reset_index()
# Reducing to bounding box through selection rather than geospatial operation
coord_merge = coord_merge.loc[(coord_merge['lat'] <= 42.0231311) &
(coord_merge['lat'] >= 41.644335) &
(coord_merge['lon'] <= -87.524044) &
(coord_merge['lon'] >= -87.940267)]
data_stack = data_df.stack().reset_index()
data_stack = data_stack.rename(columns={'level_0': 'x', 'level_1': 'y', 0: 'precip'})
merged_data = pd.merge(coord_merge, data_stack, on=['x', 'y'], how='left')[['lat','lon','precip']]
nexrad_df = merged_data.dropna().copy()
# Convert precip in dBZ into mm/hr using Marshall-Palmer https://en.wikipedia.org/wiki/DBZ_(meteorology)
nexrad_df.loc['precip'] = pow(pow(10, nexrad_df['precip']/10)/200, 0.625)
return nexrad_df.dropna()
def spatial_join(nexrad_df, gpd_file, group_col, file_time):
geo_df = gpd.read_file(gpd_file)
crs = {'init':'epsg:4326'}
geo_df.crs = crs
geometry = nexrad_df.apply(lambda z: Point(z['lon'], z['lat']), axis=1).dropna()
#geometry = [Point(xy) for xy in zip(nexrad_df.lon, nexrad_df.lat)]
nexrad_geo = gpd.GeoDataFrame(nexrad_df, crs=crs, geometry=geometry)
nexrad_geo.crs = geo_df.crs
merged_nexrad = gpd.tools.sjoin(nexrad_geo, geo_df, how='right', op='within') #.reset_index()
nexrad_grouped = merged_nexrad.groupby([group_col])['precip'].mean().reset_index()
nexrad_grouped[group_col] = nexrad_grouped[group_col].astype(int)
nexrad_grouped.fillna(value=0, inplace=True)
nexrad_grouped.sort_values(by=group_col, inplace=True)
nexrad_grouped.to_csv('data/nexrad_processed/{}_{}.csv'.format(group_col, file_time), index=False)
In [57]:
file_time = re.search(r'\d{8}_\d{6}',local_filepath).group()
lon, lat, data = extract_data(local_filepath)
print(data.shape)
nexrad_df = unstack_process(lon, lat, data)
spatial_join(nexrad_df, 'data/chicago_wards.geojson', 'ward', file_time)
In [52]:
geo_df.info(verbose=True)
In [53]:
nexrad_df.info(verbose=True)
In [ ]: